{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create anchor boxes\n",
    "\n",
    "This is the **SsdAnchorsCalculator** stage from the MediaPipe graph. It computes a list of anchor boxes. This only needs to be done once, so we will store these anchors into a lookup table.\n",
    "\n",
    "\n",
    "Using conda environnement:\n",
    "```\n",
    "conda create -c pytorch -c conda-forge -n BlazeConv 'pytorch=1.6' jupyter opencv matplotlib\n",
    "```\n",
    "```\n",
    "conda activate BlazeConv\n",
    "```\n",
    "```\n",
    "pip install tflite\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These are the options from [face_detection_mobile_gpu.pbtxt](https://github.com/google/mediapipe/blob/master/mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt).\n",
    "\n",
    "To understand what these options mean, see [ssd_anchors_calculator.proto](https://github.com/google/mediapipe/blob/master/mediapipe/calculators/tflite/ssd_anchors_calculator.proto)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "anchor_options = {\n",
    "    \"num_layers\": 4,\n",
    "    \"min_scale\": 0.1484375,\n",
    "    \"max_scale\": 0.75,\n",
    "    \"input_size_height\": 128,\n",
    "    \"input_size_width\": 128,\n",
    "    \"anchor_offset_x\": 0.5,\n",
    "    \"anchor_offset_y\": 0.5,\n",
    "    \"strides\": [8, 16, 16, 16],\n",
    "    \"aspect_ratios\": [1.0],\n",
    "    \"reduce_boxes_in_lowest_layer\": False,\n",
    "    \"interpolated_scale_aspect_ratio\": 1.0,\n",
    "    \"fixed_anchor_size\": True,\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These are the options from [face_detection_back_mobile_gpu.pbtxt](https://github.com/google/mediapipe/blob/master/mediapipe/graphs/face_detection/face_detection_back_mobile_gpu.pbtxt)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "anchor_back_options = {\n",
    "    \"num_layers\": 4,\n",
    "    \"min_scale\": 0.15625,\n",
    "    \"max_scale\": 0.75,\n",
    "    \"input_size_height\": 256,\n",
    "    \"input_size_width\": 256,\n",
    "    \"anchor_offset_x\": 0.5,\n",
    "    \"anchor_offset_y\": 0.5,\n",
    "    \"strides\": [16, 32, 32, 32],\n",
    "    \"aspect_ratios\": [1.0],\n",
    "    \"reduce_boxes_in_lowest_layer\": False,\n",
    "    \"interpolated_scale_aspect_ratio\": 1.0,\n",
    "    \"fixed_anchor_size\": True,\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is a literal translation of [ssd_anchors_calculator.cc](https://github.com/google/mediapipe/blob/master/mediapipe/calculators/tflite/ssd_anchors_calculator.cc):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calculate_scale(min_scale, max_scale, stride_index, num_strides):\n",
    "    return min_scale + (max_scale - min_scale) * stride_index / (num_strides - 1.0)\n",
    "\n",
    "\n",
    "def generate_anchors(options):\n",
    "    strides_size = len(options[\"strides\"])\n",
    "    assert options[\"num_layers\"] == strides_size\n",
    "\n",
    "    anchors = []\n",
    "    layer_id = 0\n",
    "    while layer_id < strides_size:\n",
    "        anchor_height = []\n",
    "        anchor_width = []\n",
    "        aspect_ratios = []\n",
    "        scales = []\n",
    "\n",
    "        # For same strides, we merge the anchors in the same order.\n",
    "        last_same_stride_layer = layer_id\n",
    "        while (last_same_stride_layer < strides_size) and \\\n",
    "              (options[\"strides\"][last_same_stride_layer] == options[\"strides\"][layer_id]):\n",
    "            scale = calculate_scale(options[\"min_scale\"],\n",
    "                                    options[\"max_scale\"],\n",
    "                                    last_same_stride_layer,\n",
    "                                    strides_size)\n",
    "\n",
    "            if last_same_stride_layer == 0 and options[\"reduce_boxes_in_lowest_layer\"]:\n",
    "                # For first layer, it can be specified to use predefined anchors.\n",
    "                aspect_ratios.append(1.0)\n",
    "                aspect_ratios.append(2.0)\n",
    "                aspect_ratios.append(0.5)\n",
    "                scales.append(0.1)\n",
    "                scales.append(scale)\n",
    "                scales.append(scale)                \n",
    "            else:\n",
    "                for aspect_ratio in options[\"aspect_ratios\"]:\n",
    "                    aspect_ratios.append(aspect_ratio)\n",
    "                    scales.append(scale)\n",
    "\n",
    "                if options[\"interpolated_scale_aspect_ratio\"] > 0.0:\n",
    "                    scale_next = 1.0 if last_same_stride_layer == strides_size - 1 \\\n",
    "                                     else calculate_scale(options[\"min_scale\"],\n",
    "                                                          options[\"max_scale\"],\n",
    "                                                          last_same_stride_layer + 1,\n",
    "                                                          strides_size)\n",
    "                    scales.append(np.sqrt(scale * scale_next))\n",
    "                    aspect_ratios.append(options[\"interpolated_scale_aspect_ratio\"])\n",
    "\n",
    "            last_same_stride_layer += 1\n",
    "\n",
    "        for i in range(len(aspect_ratios)):\n",
    "            ratio_sqrts = np.sqrt(aspect_ratios[i])\n",
    "            anchor_height.append(scales[i] / ratio_sqrts)\n",
    "            anchor_width.append(scales[i] * ratio_sqrts)            \n",
    "            \n",
    "        stride = options[\"strides\"][layer_id]\n",
    "        feature_map_height = int(np.ceil(options[\"input_size_height\"] / stride))\n",
    "        feature_map_width = int(np.ceil(options[\"input_size_width\"] / stride))\n",
    "\n",
    "        for y in range(feature_map_height):\n",
    "            for x in range(feature_map_width):\n",
    "                for anchor_id in range(len(anchor_height)):\n",
    "                    x_center = (x + options[\"anchor_offset_x\"]) / feature_map_width\n",
    "                    y_center = (y + options[\"anchor_offset_y\"]) / feature_map_height\n",
    "\n",
    "                    new_anchor = [x_center, y_center, 0, 0]\n",
    "                    if options[\"fixed_anchor_size\"]:\n",
    "                        new_anchor[2] = 1.0\n",
    "                        new_anchor[3] = 1.0\n",
    "                    else:\n",
    "                        new_anchor[2] = anchor_width[anchor_id]\n",
    "                        new_anchor[3] = anchor_height[anchor_id]\n",
    "                    anchors.append(new_anchor)\n",
    "\n",
    "        layer_id = last_same_stride_layer\n",
    "\n",
    "    return anchors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "anchors = generate_anchors(anchor_options)\n",
    "\n",
    "assert len(anchors) == 896\n",
    "\n",
    "anchors_back = generate_anchors(anchor_back_options)\n",
    "\n",
    "assert len(anchors_back) == 896\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Each anchor is `[x_center, y_center, width, height]` in normalized coordinates. For our use case, the width and height are always 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[0.03125, 0.03125, 1.0, 1.0],\n",
       " [0.03125, 0.03125, 1.0, 1.0],\n",
       " [0.09375, 0.03125, 1.0, 1.0],\n",
       " [0.09375, 0.03125, 1.0, 1.0],\n",
       " [0.15625, 0.03125, 1.0, 1.0],\n",
       " [0.15625, 0.03125, 1.0, 1.0],\n",
       " [0.21875, 0.03125, 1.0, 1.0],\n",
       " [0.21875, 0.03125, 1.0, 1.0],\n",
       " [0.28125, 0.03125, 1.0, 1.0],\n",
       " [0.28125, 0.03125, 1.0, 1.0]]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "anchors[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[0.03125, 0.03125, 1.0, 1.0],\n",
       " [0.03125, 0.03125, 1.0, 1.0],\n",
       " [0.09375, 0.03125, 1.0, 1.0],\n",
       " [0.09375, 0.03125, 1.0, 1.0],\n",
       " [0.15625, 0.03125, 1.0, 1.0],\n",
       " [0.15625, 0.03125, 1.0, 1.0],\n",
       " [0.21875, 0.03125, 1.0, 1.0],\n",
       " [0.21875, 0.03125, 1.0, 1.0],\n",
       " [0.28125, 0.03125, 1.0, 1.0],\n",
       " [0.28125, 0.03125, 1.0, 1.0]]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "anchors_back[:10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the \"FaceDetectionConfig\" test case from the MediaPipe repo:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of errors: 0\n"
     ]
    }
   ],
   "source": [
    "anchor_options_test = {\n",
    "    \"num_layers\": 5,\n",
    "    \"min_scale\": 0.1171875,\n",
    "    \"max_scale\": 0.75,\n",
    "    \"input_size_height\": 256,\n",
    "    \"input_size_width\": 256,\n",
    "    \"anchor_offset_x\": 0.5,\n",
    "    \"anchor_offset_y\": 0.5,\n",
    "    \"strides\": [8, 16, 32, 32, 32],\n",
    "    \"aspect_ratios\": [1.0],\n",
    "    \"reduce_boxes_in_lowest_layer\": False,\n",
    "    \"interpolated_scale_aspect_ratio\": 1.0,\n",
    "    \"fixed_anchor_size\": True,\n",
    "}\n",
    "\n",
    "anchors_test = generate_anchors(anchor_options_test)\n",
    "anchors_golden = np.loadtxt(\"./mediapipe/mediapipe/calculators/tflite/testdata/anchor_golden_file_0.txt\")\n",
    "\n",
    "assert len(anchors_test) == len(anchors_golden)\n",
    "print(\"Number of errors:\", (np.abs(anchors_test - anchors_golden) > 1e-5).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the \"MobileSSDConfig\" test case from the MediaPipe repo:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of errors: 0\n"
     ]
    }
   ],
   "source": [
    "anchor_options_test = {\n",
    "    \"num_layers\": 6,\n",
    "    \"min_scale\": 0.2,\n",
    "    \"max_scale\": 0.95,\n",
    "    \"input_size_height\": 300,\n",
    "    \"input_size_width\": 300,\n",
    "    \"anchor_offset_x\": 0.5,\n",
    "    \"anchor_offset_y\": 0.5,\n",
    "    \"strides\": [16, 32, 64, 128, 256, 512],\n",
    "    \"aspect_ratios\": [1.0, 2.0, 0.5, 3.0, 0.3333],\n",
    "    \"reduce_boxes_in_lowest_layer\": True,\n",
    "    \"interpolated_scale_aspect_ratio\": 1.0,\n",
    "    \"fixed_anchor_size\": False,\n",
    "}\n",
    "\n",
    "anchors_test = generate_anchors(anchor_options_test)\n",
    "anchors_golden = np.loadtxt(\"./mediapipe/mediapipe/calculators/tflite/testdata/anchor_golden_file_1.txt\")\n",
    "\n",
    "assert len(anchors_test) == len(anchors_golden)\n",
    "print(\"Number of errors:\", (np.abs(anchors_test - anchors_golden) > 1e-5).sum())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save the anchors to a file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(\"anchors.npy\", anchors)\n",
    "np.save(\"anchorsback.npy\", anchors_back)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
